import os
from IPython.display import display, HTML
ModuleFolder='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Py-Databases-SQL-DS\\IBM\\NBs\\'
os.chdir(ModuleFolder)
import os
ModuleFolder='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Py-Databases-SQL-DS\\IBM\\NBs\\M03'
os.chdir(ModuleFolder)
#Install & load sqlite3

#!pip install sqlite3  ##Uncomment this code only if you are working in a local environment to install sqlite3
import sqlite3
# Connecting to sqlite
# connection object
conn = sqlite3.connect('INSTRUCTOR.db')

Cursor class is an instance using which you can invoke methods that execute SQLite statements, fetch data from the result sets of the queries. You can create Cursor object using the cursor() method of the Connection object/class.

# cursor object
cursor_obj = conn.cursor()

Task 2: Create a table in the database

In this step we will create a table in the database with following details:

Before creating a table, let's first check if the table already exists or not. To drop the table from a database, use the DROP query. A cursor is an object that helps execute the query and fetch the records from the database.

# Drop the table if already exists.
cursor_obj.execute("DROP TABLE IF EXISTS INSTRUCTOR")
<sqlite3.Cursor at 0x1baeea82c40>

Dont worry if you get this error:

If you see an exception/error similar to the following, indicating that INSTRUCTOR is an undefined name, that's okay. It just implies that the INSTRUCTOR table does not exist in the table - which would be the case if you had not created it previously.

Exception: \[IBM]\[CLI Driver]\[DB2/LINUXX8664] SQL0204N "ABC12345.INSTRUCTOR" is an undefined name. SQLSTATE=42704 SQLCODE=-204

# Creating table
table = """ create table IF NOT EXISTS INSTRUCTOR(ID INTEGER PRIMARY KEY NOT NULL, FNAME VARCHAR(20), LNAME VARCHAR(20), CITY VARCHAR(20), CCODE CHAR(2));"""
 
cursor_obj.execute(table)
 
print("Table is Ready")
Table is Ready

Task 3: Insert data into the table

In this step we will insert some rows of data into the table.

The INSTRUCTOR table we created in the previous step contains 3 rows of data:

We will start by inserting just the first row of data, i.e. for instructor Rav Ahuja

cursor_obj.execute('''insert into INSTRUCTOR values (1, 'Rav', 'Ahuja', 'TORONTO', 'CA')''')
<sqlite3.Cursor at 0x1baeea82c40>

The output you will get something as: sqlite3.Cursor at 0x27a1a491260 which means mySql database has sqlite3.Cursor object at 0x27a1a49126 as output in table. But you may get the different number.

Now use a single query to insert the remaining two rows of data

cursor_obj.execute('''insert into INSTRUCTOR values (2, 'Raul', 'Chong', 'Markham', 'CA'), (3, 'Hima', 'Vasudevan', 'Chicago', 'US')''')
<sqlite3.Cursor at 0x1baeea82c40>

Task 4: Query data in the table

In this step we will retrieve data we inserted into the INSTRUCTOR table.

statement = '''SELECT * FROM INSTRUCTOR'''
cursor_obj.execute(statement)

print("All the data")
output_all = cursor_obj.fetchall()
for row_all in output_all:
  print(row_all)
# This is the list returned by output_all
print(output_all)
All the data
(1, 'Rav', 'Ahuja', 'TORONTO', 'CA')
(2, 'Raul', 'Chong', 'Markham', 'CA')
(3, 'Hima', 'Vasudevan', 'Chicago', 'US')
[(1, 'Rav', 'Ahuja', 'TORONTO', 'CA'), (2, 'Raul', 'Chong', 'Markham', 'CA'), (3, 'Hima', 'Vasudevan', 'Chicago', 'US')]
## Fetch few rows from the table
statement = '''SELECT * FROM INSTRUCTOR'''
cursor_obj.execute(statement)
  
print("All the data")
# If you want to fetch few rows from the table we use fetchmany(numberofrows) and mention the number how many rows you want to fetch
output_many = cursor_obj.fetchmany(2) 
for row_many in output_many:
  print(row_many)
All the data
(1, 'Rav', 'Ahuja', 'TORONTO', 'CA')
(2, 'Raul', 'Chong', 'Markham', 'CA')
# Fetch only FNAME from the table
statement = '''SELECT FNAME FROM INSTRUCTOR'''
cursor_obj.execute(statement)
  
print("All the data")
output_column = cursor_obj.fetchall()
for fetch in output_column:
  print(fetch)
All the data
('Rav',)
('Raul',)
('Hima',)

Bonus: now write and execute an update statement that changes the Rav's CITY to MOOSETOWN</strong>

query_update='''update INSTRUCTOR set CITY='MOOSETOWN' where FNAME="Rav"'''
cursor_obj.execute(query_update)
<sqlite3.Cursor at 0x1baeea82c40>
statement = '''SELECT * FROM INSTRUCTOR'''
cursor_obj.execute(statement)
  
print("All the data")
output1 = cursor_obj.fetchmany(2)
for row in output1:
  print(row)
All the data
(1, 'Rav', 'Ahuja', 'MOOSETOWN', 'CA')
(2, 'Raul', 'Chong', 'Markham', 'CA')

Task 5: Retrieve data into Pandas

In this step we will retrieve the contents of the INSTRUCTOR table into a Pandas dataframe

#!pip install pandas
import pandas as pd
#retrieve the query results into a pandas dataframe
df = pd.read_sql_query("select * from instructor;", conn)

#print the dataframe
df
ID FNAME LNAME CITY CCODE
0 1 Rav Ahuja MOOSETOWN CA
1 2 Raul Chong Markham CA
2 3 Hima Vasudevan Chicago US
#print just the LNAME for first row in the pandas data frame
df.LNAME[0]
'Ahuja'

Once the data is in a Pandas dataframe, you can do the typical pandas operations on it.

For example you can use the shape method to see how many rows and columns are in the dataframe

df.shape
(3, 5)

Task 6: Close the Connection

We free all resources by closing the connection. Remember that it is always important to close connections so that we can avoid unused connections taking up resources.

# Close the connection
conn.close()
#%pwd
#%ls
try:
    %load_ext sql
except Exception as E:
    %reload_ext sql
import csv, sqlite3

Selected Socioeconomic Indicators in Chicago

The city of Chicago released a dataset of socioeconomic data to the Chicago City Portal. This dataset contains a selection of six socioeconomic indicators of public health significance and a “hardship index,” for each Chicago community area, for the years 2008 – 2012.

Scores on the hardship index can range from 1 to 100, with a higher index number representing a greater level of hardship.

A detailed description of the dataset can be found on the city of Chicago's website, but to summarize, the dataset has the following variables:

  • Community Area Number (ca): Used to uniquely identify each row of the dataset

  • Community Area Name (community_area_name): The name of the region in the city of Chicago

  • Percent of Housing Crowded (percent_of_housing_crowded): Percent of occupied housing units with more than one person per room

  • Percent Households Below Poverty (percent_households_below_poverty): Percent of households living below the federal poverty line

  • Percent Aged 16+ Unemployed (percent_aged_16_unemployed): Percent of persons over the age of 16 years that are unemployed

  • Percent Aged 25+ without High School Diploma (percent_aged_25_without_high_school_diploma): Percent of persons over the age of 25 years without a high school education

  • Percent Aged Under 18 or Over 64:Percent of population under 18 or over 64 years of age (percent_aged_under_18_or_over_64): (ie. dependents)

  • Per Capita Income (per_capita_income_): Community Area per capita income is estimated as the sum of tract-level aggragate incomes divided by the total population

  • Hardship Index (hardship_index): Score that incorporates each of the six selected socioeconomic indicators

In this Lab, we'll take a look at the variables in the socioeconomic indicators dataset and do some basic analysis with Python.

Connect to the database

Let us first load the SQL extension and establish a connection with the database

The syntax for connecting to magic sql using sqllite is

%sql sqlite://DatabaseName

where DatabaseName will be your .db file

#!pip install ipython-sql
#!pip install seaborn
import seaborn as sns
%load_ext sql
The sql extension is already loaded. To reload it, use:
  %reload_ext sql
import csv, sqlite3

con = sqlite3.connect("socioeconomic.db")
cur = con.cursor()
#!pip install pandas 
%sql sqlite:///socioeconomic.db

Store the dataset in a Table

In many cases the dataset to be analyzed is available as a .CSV (comma separated values) file, perhaps on the internet. To analyze the data using SQL, it first needs to be stored in the database.
We will first read the csv files from the given url into pandas dataframes
Next we will be using the df.to_sql() function to convert each csv file to a table in sqlite with the csv data loaded in it.
import pandas
df = pandas.read_csv('https://data.cityofchicago.org/resource/jcxq-k9xf.csv')
df.to_sql("chicago_socioeconomic_data", con, if_exists='replace', index=False,method="multi")
78
# Install the 'ipython-sql' and 'prettytable' libraries using pip
#!pip install ipython-sql prettytable

# Import the 'prettytable' library, which is used to display data in a formatted table
import prettytable

# Set the default display format for prettytable to 'DEFAULT' (i.e., a simple table format)
prettytable.DEFAULT = 'DEFAULT'
You can verify that the table creation was successful by making a basic query like:
%sql SELECT * FROM chicago_socioeconomic_data limit 5;
 * sqlite:///socioeconomic.db
Done.
ca community_area_name percent_of_housing_crowded percent_households_below_poverty percent_aged_16_unemployed percent_aged_25_without_high_school_diploma percent_aged_under_18_or_over_64 per_capita_income_ hardship_index
1.0 Rogers Park 7.7 23.6 8.7 18.2 27.5 23939 39.0
2.0 West Ridge 7.8 17.2 8.8 20.8 38.5 23040 46.0
3.0 Uptown 3.8 24.0 8.9 11.8 22.2 35787 20.0
4.0 Lincoln Square 3.4 10.9 8.2 13.4 25.5 37524 17.0
5.0 North Center 0.3 7.5 5.2 4.5 26.2 57123 6.0

Problems

Problem 1

How many rows are in the dataset?
%sql SELECT COUNT(*) FROM chicago_socioeconomic_data;
 * sqlite:///socioeconomic.db
Done.
COUNT(*)
78
Click here for the solution ```python %sql SELECT COUNT(*) FROM chicago_socioeconomic_data; Correct answer: 78 ```

Problem 2

How many community areas in Chicago have a hardship index greater than 50.0?
%sql SELECT COUNT(*) as count FROM chicago_socioeconomic_data WHERE hardship_index > 50
 * sqlite:///socioeconomic.db
Done.
count
38
Click here for the solution ```python %sql SELECT COUNT(*) FROM chicago_socioeconomic_data WHERE hardship_index > 50.0; Correct answer: 38 ```

Problem 3

What is the maximum value of hardship index in this dataset?
%sql SELECT MAX(hardship_index) FROM chicago_socioeconomic_data;
 * sqlite:///socioeconomic.db
Done.
MAX(hardship_index)
98.0
Click here for the solution ```python %sql SELECT MAX(hardship_index) FROM chicago_socioeconomic_data; Correct answer: 98.0 ```

Problem 4

Which community area which has the highest hardship index?
%sql SELECT * FROM chicago_socioeconomic_data WHERE hardship_index= (SELECT MAX(hardship_index) FROM chicago_socioeconomic_data);
 * sqlite:///socioeconomic.db
Done.
ca community_area_name percent_of_housing_crowded percent_households_below_poverty percent_aged_16_unemployed percent_aged_25_without_high_school_diploma percent_aged_under_18_or_over_64 per_capita_income_ hardship_index
54.0 Riverdale 5.8 56.5 34.6 27.5 51.5 8201 98.0
Click here for the solution ```python #We can use the result of the last query to as an input to this query: %sql SELECT community_area_name FROM chicago_socioeconomic_data where hardship_index=98.0; #or another option: %sql SELECT community_area_name FROM chicago_socioeconomic_data ORDER BY hardship_index DESC LIMIT 1; #or you can use a sub-query to determine the max hardship index: %sql select community_area_name from chicago_socioeconomic_data where hardship_index = ( select max(hardship_index) from chicago_socioeconomic_data ); Correct answer: 'Riverdale' ```

Problem 5

Which Chicago community areas have per-capita incomes greater than $60,000?
%sql SELECT * FROM chicago_socioeconomic_data WHERE per_capita_income_ > 60000;
 * sqlite:///socioeconomic.db
Done.
ca community_area_name percent_of_housing_crowded percent_households_below_poverty percent_aged_16_unemployed percent_aged_25_without_high_school_diploma percent_aged_under_18_or_over_64 per_capita_income_ hardship_index
6.0 Lake View 1.1 11.4 4.7 2.6 17.0 60058 5.0
7.0 Lincoln Park 0.8 12.3 5.1 3.6 21.5 71551 2.0
8.0 Near North Side 1.9 12.9 7.0 2.5 22.6 88669 1.0
32.0 Loop 1.5 14.7 5.7 3.1 13.5 65526 3.0
Click here for the solution ```python %sql SELECT community_area_name FROM chicago_socioeconomic_data WHERE per_capita_income_ > 60000; Correct answer:Lake View,Lincoln Park, Near North Side, Loop ```

Problem 6

Create a scatter plot using the variables per_capita_income_ and hardship_index. Explain the correlation between the two variables.
import pandas as pd
import numpy as np
!pip install scipy
import scipy.stats
CSD=%sql SELECT * FROM chicago_socioeconomic_data;
df=pd.DataFrame(CSD)
j=sns.jointplot(x='per_capita_income_',y='hardship_index',data=df)
x=np.array(df['per_capita_income_'])
y=np.array(df['hardship_index'])
X=x[np.isnan(x)==False]
Y=y[np.isnan(y)==False]
minl=min([len(X),len(Y)])
X=X[0:minl]
Y=Y[0:minl]
#print(np.isnan(X))
#print(np.isnan(Y))
r,p=scipy.stats.pearsonr(X,Y)
#j.annotate(stats.pearsonr)
# if you choose to write your own legend, then you should adjust the properties then
phantom, = j.ax_joint.plot([], [], linestyle="", alpha=0)
j.ax_joint.legend([phantom],['r={:.3f}, p={:.4f}'.format(r,p)])

#j.ax_joint.legend([phantom],[f'r={round(r,3)}, p={float(p)}'])
Requirement already satisfied: scipy in c:\users\gamaliel\anaconda3\lib\site-packages (1.13.1)
Requirement already satisfied: numpy<2.3,>=1.22.4 in c:\users\gamaliel\anaconda3\lib\site-packages (from scipy) (1.26.4)
 * sqlite:///socioeconomic.db
Done.
<matplotlib.legend.Legend at 0x1baf4ae7bf0>
Click here for the solution ```python # if the import command gives ModuleNotFoundError: No module named 'seaborn' # then uncomment the following line i.e. delete the # to install the seaborn package # !pip install seaborn !pip install matplotlib seaborn income_vs_hardship = %sql SELECT per_capita_income_, hardship_index FROM chicago_socioeconomic_data; plot = sns.jointplot(x='per_capita_income_',y='hardship_index', data=income_vs_hardship.DataFrame()) Correct answer:You can see that as Per Capita Income rises as the Hardship Index decreases. We see that the points on the scatter plot are somewhat closer to a straight line in the negative direction, so we have a negative correlation between the two variables. ```

Conclusion

Remember that equation for the pearson correlation in

\begin{equation*} r = \frac{ \sum_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y}) }{% \sqrt{\sum_{i=1}^{n}(x_i-\bar{x})^2}\sqrt{\sum_{i=1}^{n}(y_i-\bar{y})^2}} \end{equation*}

Chicago Public Schools - Progress Report Cards (2011-2012)

The city of Chicago released a dataset showing all school level performance data used to create School Report Cards for the 2011-2012 school year. The dataset is available from the Chicago Data Portal: https://data.cityofchicago.org/Education/Chicago-Public-Schools-Progress-Report-Cards-2011-/9xs2-f89t

This dataset includes a large number of metrics. Start by familiarizing yourself with the types of metrics in the database: https://data.cityofchicago.org/api/assets/AAD41A13-BE8A-4E67-B1F5-86E711E09D5F?download=true

NOTE:

Do not download the dataset directly from City of Chicago portal. Instead download a static copy which is a more database friendly version from this link.

Now review some of its contents.

Connect to the database

Let us now load the ipython-sql extension and establish a connection with the database

The syntax for connecting to magic sql using sqllite is

%sql sqlite://DatabaseName

where DatabaseName will be your .db file

import csv, sqlite3

con = sqlite3.connect("CPS.db")
cur = con.cursor()
#!pip install pandas
#!pip install ipython-sql prettytable

import prettytable
prettytable.DEFAULT = 'DEFAULT'
#!pip install ipython-sql
# Extension for working with SQL 
%load_ext sql
The sql extension is already loaded. To reload it, use:
  %reload_ext sql
#The syntax for connecting to magic sql using sqllite is
#%sql sqlite://DatabaseName 

%sql sqlite:///RealWorldData.db

Store the dataset in a Table

In many cases the dataset to be analyzed is available as a .CSV (comma separated values) file, perhaps on the internet.
To analyze the data using SQL, it first needs to be stored in the database.
We will first read the csv files from the given url into pandas dataframes
Next we will be using the df.to_sql() function to convert each csv file to a table in sqlite with the csv data loaded in it.
import pandas as pd
lk1='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DB0201EN-SkillsNetwork/labs/FinalModule_Coursera_V5/data/ChicagoPublicSchools.csv'
lk2='https://data.cityofchicago.org/resource/9xs2-f89t.csv'
df=pd.read_csv(lk2)
df.iloc[0:2,:]
#df=pd.DataFrame(csv_pandas)
school_id name_of_school elementary_or_high_school street_address city state zip_code phone_number link_ network_manager ... rcdts_code x_coordinate y_coordinate latitude longitude community_area_number community_area_name ward police_district location
0 610038 Abraham Lincoln Elementary School ES 615 W Kemper Pl Chicago IL 60614 (773) 534-5720 http://schoolreports.cps.edu/SchoolProgressRep... Fullerton Elementary Network ... 150000000000000 1171699.458 1915829.428 41.924497 -87.644522 7 LINCOLN PARK 43 18 (41.92449696, -87.64452163)
1 610281 Adam Clayton Powell Paideia Community Academy ... ES 7511 S South Shore Dr Chicago IL 60649 (773) 535-6650 http://schoolreports.cps.edu/SchoolProgressRep... Skyway Elementary Network ... 150000000000000 1196129.985 1856209.466 41.760324 -87.556736 43 SOUTH SHORE 7 4 (41.76032435, -87.55673627)

2 rows × 79 columns

df.to_sql("CHICAGO_PUBLIC_SCHOOLS_DATA", con, if_exists='replace', index=False, method="multi")
566

Double-click here for the solution. <!--

import pandas

df = pandas.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DB0201EN-SkillsNetwork/labs/FinalModule_Coursera_V5/data/ChicagoPublicSchools.csv") df.to_sql("CHICAGO_PUBLIC_SCHOOLS_DATA", con, if_exists='replace', index=False, method="multi")

-->

Query the database system catalog to retrieve table metadata

You can verify that the table creation was successful by retrieving the list of all tables in your schema and checking whether the SCHOOLS table was created
# type in your query to retrieve list of all tables in the database
%load_ext sql
import csv, sqlite3
%sql SELECT name FROM sqlite_master WHERE type='table'
The sql extension is already loaded. To reload it, use:
  %reload_ext sql
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
Done.
name

Double-click here for a hint

Double-click here for the solution.

%sql SELECT * FROM PRAGMA_TABLE_INFO('CHICAGO_PUBLIC_SCHOOLS_DATA') limit 5;
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
Done.
cid name type notnull dflt_value pk

Query the database system catalog to retrieve column metadata

The SCHOOLS table contains a large number of columns. How many columns does this table have?
# type in your query to retrieve the number of columns in the SCHOOLS table
%sql SELECT count(name) FROM PRAGMA_TABLE_INFO('CHICAGO_PUBLIC_SCHOOLS_DATA');
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
Done.
count(name)
0

Double-click here for the solution.

Now retrieve the the list of columns in SCHOOLS table and their column type (datatype) and length.

# type in your query to retrieve all column names in the SCHOOLS table along with their datatypes and length
# The code below shows that TEXT and INTEGER have lengths 4 & 7, but not like type(varchar(20)) = 20
%sql SELECT name,type,length(type) FROM PRAGMA_TABLE_INFO('CHICAGO_PUBLIC_SCHOOLS_DATA') limit 5;
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
Done.
name type length(type)

Double-click here for the solution.

Questions

  1. Is the column name for the "SCHOOL ID" attribute in upper or mixed case?
  2. What is the name of "Community Area Name" column in your table? Does it have spaces?
  3. Are there any columns in whose names the spaces and paranthesis (round brackets) have been replaced by the underscore character "_"?

Problems

Problem 1

How many Elementary Schools are in the dataset?
%sql select count(distinct(lower(name_of_school))) as No_Schools from CHICAGO_PUBLIC_SCHOOLS_DATA as schools where elementary_or_high_school='ES'
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select count(distinct(lower(name_of_school))) as No_Schools from CHICAGO_PUBLIC_SCHOOLS_DATA as schools where elementary_or_high_school='ES']
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for a hint

Double-click here for another hint

Double-click here for the solution.

Problem 2

What is the highest Safety Score?
%sql select max(safety_score) from CHICAGO_PUBLIC_SCHOOLS_DATA;
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select max(safety_score) from CHICAGO_PUBLIC_SCHOOLS_DATA;]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for a hint

Double-click here for the solution.

Problem 3

Which schools have highest Safety Score?
#%sql select * from CHICAGO_PUBLIC_SCHOOLS_DATA where safety_score = (select max(safety_score) from CHICAGO_PUBLIC_SCHOOLS_DATA);
#Just to count them
%sql select count(*) from CHICAGO_PUBLIC_SCHOOLS_DATA where safety_score = (select max(safety_score) from CHICAGO_PUBLIC_SCHOOLS_DATA);
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select count(*) from CHICAGO_PUBLIC_SCHOOLS_DATA where safety_score = (select max(safety_score) from CHICAGO_PUBLIC_SCHOOLS_DATA);]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for the solution.

Problem 4

What are the top 10 schools with the highest "Average Student Attendance"?
%sql select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA order by average_student_attendance DESC limit 10
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA order by average_student_attendance DESC limit 10]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for the solution.

Problem 5

Retrieve the list of 5 Schools with the lowest Average Student Attendance sorted in ascending order based on attendance
%sql select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA order by average_student_attendance limit 5
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA order by average_student_attendance limit 5]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for the solution.

Problem 6

Now remove the '%' sign from the above result set for Average Student Attendance column
sql select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA order by average_student_attendance limit 5
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA order by average_student_attendance limit 5]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for a hint

Double-click here for the solution.

Problem 7

Which Schools have Average Student Attendance lower than 70%?
sql select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA where average_student_attendance < 70
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select name_of_school, average_student_attendance from CHICAGO_PUBLIC_SCHOOLS_DATA where average_student_attendance < 70]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for a hint

Double-click here for another hint

Double-click here for the solution.

Problem 8

Get the total College Enrollment for each Community Area
sql select community_area_name, round(sum(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name order by sum(college_enrollment_rate) DESC limit 10
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select community_area_name, round(sum(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name order by sum(college_enrollment_rate) DESC limit 10]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for a hint

Double-click here for another hint

Double-click here for the solution.

Problem 9

Get the 5 Community Areas with the least total College Enrollment sorted in ascending order
sql select community_area_name, round(SUM(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name order by SUM(college_enrollment_rate) ASC limit 5
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select community_area_name, round(SUM(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name order by SUM(college_enrollment_rate) ASC limit 5]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for a hint

Double-click here for the solution.

Problem 10

List 5 schools with lowest safety score.
sql select name_of_school, safety_score from CHICAGO_PUBLIC_SCHOOLS_DATA order by safety_score asc limit 5
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CHICAGO_PUBLIC_SCHOOLS_DATA
[SQL: select name_of_school, safety_score from CHICAGO_PUBLIC_SCHOOLS_DATA order by safety_score asc limit 5]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for the solution.

Problem 11

Get the hardship index for the community area of the school which has College Enrollment of 4368
df = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DB0201EN-SkillsNetwork/labs/FinalModule_Coursera_V5/data/ChicagoCensusData.csv")
df.to_sql("CENSUS_DATA", con, if_exists='replace', index=False,method="multi")
78
#sql select * from CENSUS_DATA CD, CHICAGO_PUBLIC_SCHOOLS_DATA CPS limit 2
#%sql select college_enrollment_rate from CHICAGO_PUBLIC_SCHOOLS_DATA CPS
%%sql
select CPS.name_of_school, CD.hardship_index from CENSUS_DATA CD, CHICAGO_PUBLIC_SCHOOLS_DATA CPS 
where CD.community_area_number = CPS.community_area_number 
and college_enrollment_rate = (select round(SUM(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name)
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CENSUS_DATA
[SQL: select CPS.name_of_school, CD.hardship_index from CENSUS_DATA CD, CHICAGO_PUBLIC_SCHOOLS_DATA CPS 
where CD.community_area_number = CPS.community_area_number 
and college_enrollment_rate = (select round(SUM(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name)]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for the solution.

Problem 12

Get the hardship index for the community area which has the highest value for College Enrollment
%%sql
select CPS.name_of_school, CD.hardship_index from CENSUS_DATA CD, CHICAGO_PUBLIC_SCHOOLS_DATA CPS 
where CD.community_area_number = CPS.community_area_number 
and college_enrollment_rate = (select round(SUM(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name)
 * sqlite:///RealWorldData.db
   sqlite:///socioeconomic.db
(sqlite3.OperationalError) no such table: CENSUS_DATA
[SQL: select CPS.name_of_school, CD.hardship_index from CENSUS_DATA CD, CHICAGO_PUBLIC_SCHOOLS_DATA CPS 
where CD.community_area_number = CPS.community_area_number 
and college_enrollment_rate = (select round(SUM(college_enrollment_rate),2) from CHICAGO_PUBLIC_SCHOOLS_DATA group by community_area_name)]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Double-click here for the solution.

Summary

In this lab you learned how to work with a real word dataset using SQL and Python. You learned how to query columns with spaces or special characters in their names and with mixed case names. You also used built in database functions and practiced how to sort, limit, and order result sets, as well as used sub-queries and worked with multiple tables.

Saving

import os
FromFld='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Py-Databases-SQL-DS\\Mine\\'
os.chdir(FromFld)
try:
    !jupyter nbconvert SQL-Py-Notes.ipynb --to html --template pj
except Exception as e:
    print('HTML not stored')
import shutil
import os



#file2=Tofld+'P4DSNotes.html'
# The line above copies files from A -> B
#shutil.copy(os.path.join(FromFld,fileh), Tofld)
# The line above copies all the content from A -> B
#shutil.copytree(FromFld, Tofld)
import shutil
FromFld='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Py-Databases-SQL-DS\\Mine\\'
Tofld='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\IBM_DS_Jupyter_Tasks\\Python4DataScience\\'
SQL-Py-Notes.html deleted in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\
SQL-Py-Notes.html replaced in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\
# NB
fileh='SQL-Py-Notes.ipynb'
try:
    if os.path.isfile(Tofld+'/'+fileh):
        os.remove(Tofld+'/'+fileh)
        print(fileh, 'deleted in', Tofld)
        shutil.copy(os.path.join(FromFld,fileh),os.path.join(Tofld,fileh))
        print(fileh, 'copied in', Tofld)
    else:
        shutil.copy(os.path.join(FromFld,fileh),os.path.join(Tofld,fileh))
        print(fileh, 'copied in', Tofld)
except Exception as e:
    print('HTML not moved')
SQL-HTMLs.ipynb deleted in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\
SQL-HTMLs.ipynb copied in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\
# HTML
fileh='SQL-Py-Notes.html'
try:
    if os.path.isfile(Tofld+'/'+fileh):
        os.remove(Tofld+'/'+fileh)
        print(fileh, 'deleted in', Tofld)
        shutil.move(os.path.join(FromFld,fileh),os.path.join(Tofld,fileh))
        print(fileh, 'replaced in', Tofld)
    else:
        shutil.move(os.path.join(FromFld,fileh),os.path.join(Tofld,fileh))
        print(fileh, 'written in', Tofld)
except Exception as e:
    print('HTML not moved')
txtfl="C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Py-Databases-SQL-DS\\IBM\\SQL-Relted\\IBMDBb2\\IBMDB.txt"
with open(txtfl, "r") as file1:
    FileContent = file1.read()
#IBM DB    print(FileContent)
#import ibm_db
database= "bludb"
hostname="19af6446-6171-4641-8aba-9dcff8e1b6ff.c1ogj3sd0tgtu0lqde00.databases.appdomain.cloud"
port= "30699"
password= "z2bFPIx5O6BHKSgU"
username= "fhg76116"
protocol='TCPIP'
dsn=(
    "DRIVER={{IBM DB2 ODBC DRIVER}};"
    "DATABASE= bludb;"
    "HOSTNAME=19af6446-6171-4641-8aba-9dcff8e1b6ff.c1ogj3sd0tgtu0lqde00.databases.appdomain.cloud;"
    "PORT= 30699;"
    "PROTOCOL=TCPIP;"
    "UID= fhg76116;"
    "PWD= z2bFPIx5O6BHKSgU;").format("",hostname,port,username,password)

#con=ibm_db.connect(dsn,"","")